*_____________________________________________________________________________________________________________________________________________________
*
**# LAEBLLING
*_____________________________________________________________________________________________________________________________________________________

label var inw 		"Whether responded to wave"
label var mergeid	"Unique ID"
label var wave		"Survey wave"
label var riwy		"Survey year (from interview date; see also YEAR)"
label var ragey_e	"Age in years at end of interview"
label var ragey		"Age in years at interview"
label var rshlt		"SRH Excellent-Poor"
label var rshlta	"SRH Very Good-Very Bad (ELSA w1 w3)"
label var rwld		"Work-limiting disability"
label var rwork		"Recent work status (RAND version)"
recode ragender	(1=1 "1_male")(2=0 "0_female"), gen(rmale)
drop ragender
label var rmale		"Gender is male (vs female)"

// Country
label var cid 		"Country-language identifier"
label define countrynew 11 "Austria" 12 "Germany" 13 "Sweden" 14 "Netherlands" 15 "Spain" 16 "Italy" 17 "France" 18 "Denmark" 19 "Greece" 	///
	20 "Switzerland" 23 "Belgium" 25 "Israel" 28 "Czechia" 29 "Poland" 30 "Ireland" 31 "Luxembourg" 32 "Hungary" 33 "Portugal" 		///
	34 "Slovenia" 35 "Estonia" 47 "Croatia" 51 "USA" 61 "England"
label values country countrynew
drop if country==30			// Tiny Irish sample
* A consecutive version is needed later on
decode country, gen(countrystring)
encode countrystring, gen(country2)
	label var country2 "Country with consecutive numbering"
	order country2, after(country)
drop countrystring
* Country-wave identifiers
egen country_waveN = group(country wave), label
numlabel country_waveN countrynew country2, mask(#_) add 
label var country_waveN "Country-wave consecutive identifier"
* Survey-wave identifiers
gen survey_wave = survey + strofreal(wave, "%02.0f")
encode survey_wave, gen(survey_waveN)
	numlabel survey_waveN, mask(#_) add
	label var survey_waveN "Survey-wave combined identifier"
	drop survey_wave
* Regimes are at the bottom, as done over many lines to save time


// Unique ID in numeric form
capture drop mergeidnum
gen surveyarea = substr(mergeid, 1, 2) if survey=="SHARE"		// For Switzerland and Belgium, this is NOT the same as the country code...
encode surveyarea, gen(surveyareacode)
clonevar mergeidnum = mergeid if inlist(survey, "ELSA", "HRS")
replace mergeidnum = string(surveyareacode, "%2.0f") + substr(mergeid, 4, 6) + substr(mergeid, 11, 2) if survey=="SHARE"
destring mergeidnum, replace
* Tidying up
drop surveyarea*
order mergeidnum, after(mergeid)
	label var mergeidnum "Unique ID in numeric format (for TSSET)"

	
// ELSA-SHARE versions of health vars
label var rwalk100a	"Motor skills: walk 100m/one block"
label var rsita		"Motor skills: sit for 2hrs"
label var rchaira	"Motor skills: get up from chair"
label var rclimsa	"Motor skills: climb several flights of stairs"
label var rclim1a	"Motor skills: climb one flight of stairs"
label var rstoopa 	"Motor skills: stoop/kneel/crouch"
label var rarmsa	"Motor skills: reaching above shoulder"
label var rpusha	"Motor skills: pulling/pushing large objects"
label var rlifta	"Motor skills: lifting 5kg"
label var rdimea	"Motor skills: picking up a small coin"
label var rdressa 	"ADLs: Dressing"
label var rwalkra 	"ADLs: Walking across a room"
label var rbatha 	"ADLs: Bathing or showering"
label var reata 	"ADLs: Eating"
label var rbeda 	"ADLs: Getting in or out of bed"
label var rtoilta 	"ADLs: Using the toilet"
label var rmapa 	"IADLs: Using a map"
label var rmealsa 	"IADLs: Preparing a hot meal"
label var rshopa 	"IADLs: Shopping for groceries" 
label var rphonea 	"IADLs: Making telephone calls"
label var rmedsa 	"IADLs: Taking medications"
label var rmoneya	"IADLs: Managing money"


// Survey year
gen year = .
label var year		"Survey year (for whole survey wave)"
replace year = 1990 + 2*wave if survey=="HRS" 
replace year = 2000 + 2*wave if survey=="ELSA" 
replace year = 2004 		 if survey=="SHARE" & wave==1
replace year = 2003 + 2*wave if survey=="SHARE" & wave>=2 


*_____________________________________________________________________________________________________________________________________________________
*
**# SURVEY THINGS - WEIGHTS, PROXIES
* Moved to top June 2023, so that xtile can be weighted
*_____________________________________________________________________________________________________________________________________________________

// Survey weights - set to 1 for each survey-wave that exists
drop if rwtresp==0 | missing(rwtresp)
gen rwtresp2 = .
levelsof country_waveN		// To get number of country-waves, so that can set the mean weight to 1 for each one
foreach i in `r(levels)' {
	dis "Country-wave `i'"
	sum rwtresp if country_waveN==`i'
	replace rwtresp2 = rwtresp/`r(mean)' if country_waveN==`i' 
	/**/				}
label var rwtresp2 "Survey weight with mean=1 for each country-wave"
label var rwtresp  "Original survey weight from RAND file (pop weights except for ELSA)"
label var rscwtresp "Original self-completion weight (ELSA only)"


// Proxies
* Labelling
label var rproxy	"Whether interview done by proxy"
replace rproxy = 2 if rproxy==1 & inlist(survey,"ELSA","HRS")
label define PROXY 1"1.partial proxy" 2"2.full proxy", modify
* SHARE dv
label define proxy_share 0"0_r all modules" 1"1_r for ph&ep modules" 2"2_r for 1 module, DK/missing for other" 11"11_r&proxy for ph and/or ep" 12"12_proxy only for ph&ep", replace
	gen 	proxy_share = 0 if rproxy==0 & survey=="SHARE"
	replace proxy_share = 1 if rproxy!=0 & rproxy_ep==1 & rproxy_ph==1
	replace proxy_share = 2 if inlist(1,rproxy_ep,rproxy_ph) & (inlist(rproxy_ep,.,-1,-2) | inlist(rproxy_ph,.,-1,-2) ) 
	replace proxy_share = 12 if rproxy_ep==3 | rproxy_ph==3
	replace proxy_share = 11 if inlist(2,rproxy_ep,rproxy_ph)
	replace proxy_share = .d if rproxy_ep==-1 & rproxy_ph==-1
	replace proxy_share = .r if rproxy_ep==-2 & rproxy_ph==-2
	recode proxy_share (.=.u) if survey=="SHARE"			// unknown - completely missing for both (or 1 var refused, the other missing)
	label values proxy_share proxy_share
	drop rproxy_??
recode rproxy (0=0 "0_not proxy")(1=2 "1_proxy but not on ep or ph modules")(99=2 "2_proxy on ep or ph")(2=3 "3_proxy on all modules"), gen(rproxyR)
	replace rproxyR = 1 if proxy_share==1
	label var rproxyR "dv Whether interview done by proxy (with added SHARE info)"
drop proxy_share


*_____________________________________________________________________________________________________________________________________________________
*
**# FUNCTIONAL LIMITATIONS 
*_____________________________________________________________________________________________________________________________________________________

// Combining vars from HRS into ELSA/SHARE, esp. around the can't do / don't do spontaneous responses - see full notes in OneNote
ren rwalk1 rwalk100
*1: For most IADLs, Rs were specifically asked follow-ups if said 'cant do' or 'dont do', asking if this was because of ill-health
local ADLvars1 		"meals shop phone meds money"
foreach var in `ADLvars1' {
	recode r`var' (2=1)(9=0) if survey=="HRS"
	* Combining into ELSA-SHARE varnames
	replace r`var'a	= r`var' if survey=="HRS"
	* Dropping vars I'm not using
	drop r`var'
/**/ }
*2: For other vars, I've mostly assumed that 'don't do' means they can't do it, because nearly everyone does these tasks
local motorvars2	"sit chair stoop arms push lift dime"
local ADLvars2 		"dress walkr bath eat bed toilt"
foreach var in `motorvars2' `ADLvars2' {
	recode r`var' (2=1)(9=1) if survey=="HRS"
	* Combining into ELSA-SHARE varnames
	replace r`var'a	= r`var' if survey=="HRS"
	* Dropping vars I'm not using
	drop r`var'
/**/ }
*3: But for a select few vars, it seemed plausible that non-negligible numbers of Americans really don't do them
local motorvars3	"walk100 clims clim1"
local ADLvars3		"map"
foreach var in `motorvars3' `ADLvars3' {
	recode r`var' (2=1)(9=.d) if survey=="HRS"
	* Combining into ELSA-SHARE varnames
	replace r`var'a	= r`var' if survey=="HRS"
	* Dropping vars I'm not using
	drop r`var'
/**/ }


// Combined variables
egen anyADL  	= 			anymatch(rdress rwalkr rbath reat rbed rtoilt), values(1)
egen countADL  	= 			anycount(rdress rwalkr rbath reat rbed rtoilt), values(1)
	egen ADLmiss =   rowmiss(rdress rwalkr rbath reat rbed rtoilt)
	replace anyADL=. if ADLmiss>0
	replace countADL=. if ADLmiss>0
	label var anyADL 	"ADLs: Any of 6 ADLs"
	label var countADL 	"ADLs: Count of 6 ADLs"
egen anyIADL 	= anymatch(rmeals rshop rphone rmeds rmoney), values(1)
egen countIADL 	= anycount(rmeals rshop rphone rmeds rmoney), values(1)
	capture egen IADLmiss = rowmiss(rmeals rshop rphone rmeds rmoney)
	replace anyIADL=. if IADLmiss>0
	replace countIADL=. if IADLmiss>0
	recode countIADL (0=0)(1=1)(2/5=2 "2 or more"), gen(anyIADLt)
	label var anyIADL 	"IADLs: Any of 5 IADLs (exc maps)"
	label var anyIADLt	"IADLs: 0/1/2+ of 5 IADLs (exc maps)"
	label var countIADL "IADLs: Count of 5 IADLs (exc maps)"
egen anymotor 	= anymatch(rwalk100 rsit rchair rclim1 rstoop rarms rpush rlift rdime), values(1)
egen countmotor = anycount(rwalk100 rsit rchair rclim1 rstoop rarms rpush rlift rdime), values(1)
	capture egen motormiss = rowmiss(rwalk100 rsit rchair rclim1 rstoop rarms rpush rlift rdime)
	replace anymotor=. if motormiss>0
	replace countmotor=. if motormiss>0
	label var anymotor 		"Motor skills: Any of 9 skills"
	label var countmotor 	"Motor skills: Count of 9 skills"
drop ADLmiss IADLmiss motormiss


// Dropping other vars I'm not using
drop rwalks rjog 
label drop YESNOW		// this is the label for rwld, which gets in the way of outputting column titles in the bootstrap

/* Binary SRH (no longer used)
tab1 rshlt, gen(rshlt)
recode rshlt (4 5=1 "1_fair or poor")(1/3=0 "0_good vgood excellent"), gen(rshltB)
	label var rshltB	"Self-reported health: binary (fair or poor)"
order rshlt?, after(rshlt)
*/

	
// Combined walking/climbing variables
* Assuming that anyone in ELSA/SHARE who struggles walking up one flight of stairs would struggle with several
replace rclimsa = 1 if rclim1a==1
* Combined vars
clonevar rclimT = rclimsa
	replace rclimT = 2 if rclim1a==1
	label define rclimT 0 "0_No" 1 "1_Cant climb several flights of stairs" 2 "2_Cant climb one flight of stairs"
	label values rclimT rclimT
	label var rclimT "Motor skills: climb one(=2) OR several flights of stairs (=1)"
clonevar rwalkT = rwalk100a
	replace rwalkT = 2 if rwalkra==1	
	label define rwalkT 0 "0_No" 1 "1_Cant walk 100m or a block" 2 "2_Cant walk across a room"
	label values rwalkT rwalkT 
	label var rwalkT "Motor skills: walk across room (=2) OR 100m (=1)"
	replace rwalkT = .d if rwalkra==.d			// There's only 2ppl, and causes problems in sample selection unless we make the vars consistent

	
// Vision and hearing
foreach var in rhefrnd rhepap	{	// overlap in codes between surveys, which needs disambiguating
	recode `var' (-1=.n) if survey=="ELSA"
/**/							}
mvdecode rhehear rheeye rsee_screen_* rhefrnd rhepap, mv(-9 -2 9=.r \ -8 -1 8=.d)
replace rheeye = rsee_screen_HRS if survey=="HRS"
	label define heeye 1 "excellent" 2"very good" 3"good" 4"fair" 5"poor" 6"SPONTANEOUS:blind", replace
	label values rheeye heeye
	drop rsee_screen_HRS
	replace rhefrnd = .n if rheeye==6  & survey=="HRS"		// different coding in HRS, so making this consistent - but different screening in SHARE
	replace rhepap   = .n if rheeye==6  & survey=="HRS"		// different coding in HRS, so making this consistent - but different screening in SHARE
label define vision 1"1_excellent" 2"2_very good" 3"3_good" 4"4_fair" 5"5_poor/impossible(blind)", replace
	recode rhefrnd rhepap (.n 6=5)							// 4 ppl in Poland oddly coded to rehfrnd/rhepap==6, assuming this is people who spontaneously said they were blind
	label values rhefrnd rhepap vision
label define hearing 1"1_excellent" 2"2_very good" 3"3_good" 4"4_fair" 5"5_poor", replace
	label values rhehear hearing
	label var rhehear		"Hearing SR excellent-poor"
recode rhehear (5=3 "3_poor")(4=2 "2_fair")(1/3=1 "1_good/v good/excellent"), gen(hearingT)
	label var hearingT		"Hearing SR poor/fair/other reasponses (rhefrnd rhepap combined)"
label define visionR 5"5_poor/impossible for near and/or far-sight" 4"4_fair for near and/or far-sight" 3"3_good for near and/or far-sight" 2"2_v good or excellent for near & far-sight", replace
	* Defined using priority coding to make syntax easier
	gen 	visionR = 2 if !missing(rhefrnd,rhepap)
	replace visionR = 3 if inlist(3,rhefrnd,rhepap)
	replace visionR = 4 if inlist(4,rhefrnd,rhepap)
	replace visionR = 5 if inlist(5,rhefrnd,rhepap)
	replace visionR = rhefrnd if missing(rhefrnd)
	replace visionR = rhepap if missing(rhepap)
	label values visionR visionR
	label var visionR		"dv Combined vision variable (rhefrnd rhepap combined)"
recode visionR (5=3 "3_poor/impossible for near and/or far-sight")(4=2 "2_fair for near and/or far-sight")(1/3=1 "1_good/v good/excellent for near or far sight"), gen(visionT)
	label var visionT		"dv Combined vision variable (rhefrnd rhepap combined)"
	
	
*_____________________________________________________________________________________________________________________________________________________
*
**# OTHER HEALTH/DISABILITY VARS
*_____________________________________________________________________________________________________________________________________________________

// LLSI
recode rllsiA_B (-2=.r)(-1=.d), gen(llsiH)				// these are correctly labelled in rllsiA
	label var llsiH "Limited activities - high prevalence inc. non-severe (SHARE only)"
recode rllsiA   (-2=.r)(-1=.d)(1=1 "1_Severely limited")(2/3=0 "0_other response"), gen(llsiL)				
	label var llsiL "Limited activities - low prevalence, severe only (SHARE only)"

	
// Cognition vars
label var rimrc 	"Cognition: immediate word recall (0-10)"
label var rdlrc 	"Cognition: delayed word recall (0-10)"
label var rverbf	"Cognition: SHARE ONLY: verbal fluency (0-100)"
recode rimrc (.s=.p)(.x=.m) if survey=="HRS"		// Different missingness codes - .s are proxy respondents this wave, .x are proxy or NR at all waves
recode rdlrc (.s=.p)(.x=.m) if survey=="HRS"		
* Setting HRS imputations to missing, as not imputed in ELSA or SHARE
replace rimrc = .i if rfimrc==1 & survey=="HRS"
replace rdlrc = .i if rfdlrc==1 & survey=="HRS"
drop rfimrc rfdlrc
* Binary var for IRT models
foreach var in rimrc rdlrc {
	xtile `var'T=`var' if (ragey_e<60 | ragey<60) [pw=rwtresp2], nquant(3) 
		recode `var'T (1=1 "1_bottom tertile")(2/3=0), gen(`var'B)
		label var `var'B "Cognition: bottom tertile **of U60s** for `var'"
	xtile `var'T2=`var' if (ragey_e<65 | ragey<65) [pw=rwtresp2], nquant(3) 
		recode `var'T2 (1=1 "1_bottom tertile")(2/3=0), gen(`var'B2)
		label var `var'B2 "Cognition: bottom tertile **of U65s** for `var'"
/**/ }
drop r??rcT*


// MH vars - CESD and EuroD combined
* Caseness, as in Courtin et al
recode rcesd  (0/2=0)(3/max=1 "1_depression caseness") if inlist(survey, "ELSA", "HRS")	, gen(rmhcase_cesd)
recode reurod (0/3=0)(4/max=1 "1_depression caseness") if inlist(survey, "SHARE")		, gen(rmhcase_eurod)
clonevar rmhcase = rmhcase_cesd
	replace rmhcase = rmhcase_eurod if survey=="SHARE"
* Scale, as in Riumallo-Herl
clonevar rmhscale_cesd = rcesd if inlist(survey, "ELSA", "HRS")
	sum rmhscale_cesd 
	gen rmhscale_cesdZ = (rmhscale_cesd - `r(mean)') / `r(sd)'
clonevar rmhscale_eurod = reurod if inlist(survey, "SHARE") 
	sum rmhscale_eurod 
	gen rmhscale_eurodZ = (rmhscale_eurod - `r(mean)') / `r(sd)'
clonevar rmhscale = rmhscale_cesd
	replace rmhscale = rmhscale_eurod if survey=="SHARE"
clonevar rmhscale_Z = rmhscale_cesdZ
	replace rmhscale_Z = rmhscale_eurodZ if survey=="SHARE"
* Final tidying up
label var rmhscale 	 "Depression symptoms (raw CESD/Euro-D combined)"
label var rmhscale_Z "Depression symptoms (normalised CESD/Euro-D combined)"
label var rmhcase	 "Depression caseness (CESD/Euro-D combined)"
drop rcesd rcesdm rdepres reffort renlife rflone rfsad rgoing rsleepr rwhappy rappett rconcnt rdepress renjoym reurod rfeurod rfatig ///
	rguilt rintrst rirritb rpessim rsleep rsuicid rtearfl rmhcase_* rmhscale_cesd* rmhscale_eurod*


// Grip strength
* SHARE-only - height & weight adjusted
reg rmaxgrip (c.rheight c.rweight##c.rweight)##i.rmale if ragey<60 & survey=="SHARE"
*predict rmaxgrip_pred if e(sample), xb
predict rmaxgrip_resid if e(sample) & survey=="SHARE", residuals
replace rmaxgrip_resid = .m if (rheight==.m | rweight==.m)
replace rmaxgrip_resid = .i if (rheight==.i | rweight==.i)
replace rmaxgrip_resid = .d if (rheight==.d | rweight==.d)
replace rmaxgrip_resid = .r if (rheight==.r | rweight==.r)
* Quartile-based
foreach var in rmaxgrip rmaxgrip_resid {
	xtile `var'T=`var' if (ragey_e<60 | ragey<60) [pw=rwtresp], nquant(4) 
		replace `var'T = 1 if rgs_no_inj==1 | rgs_no_safe==1
	recode `var'T (1=1 "1_bottom quartile grip / no GS due to health or safety")(2/4=0), gen(`var'B)
	label var `var'B "GS: low grip strength (bottom quartile or no obs due to health)"
/**/ }
drop rmaxgrip*T 
drop rmaxgrip rmaxgrip_resid rgs*


	
*_____________________________________________________________________________________________________________________________________________________
*
**# BMI
*_____________________________________________________________________________________________________________________________________________________

// BMI (self-reported from SHARE)
gen rbmi_SHARE  = rweight/(rheight^2)
replace rbmi = rbmi_SHARE if survey=="SHARE"
drop /*rbmi rheight rweight*/ rbmi_SHARE


// BMI - carried forward
// originally done with height and weight, but there are no cases whatsoever where it helps to use height or weight without the other
tsset mergeidnum wave
capture drop rbmi_F? rbmi_L? rheight_F? rheight_L? rweight_F? rweight_L?
capture drop rbmi_flag
forvalues wave = 1/6 {
	// Note that I only have HRS from wave 7, so can only go back 5 waves in any survey at this moment
	gen rbmi_L`wave' 	= L`wave'.rbmi		
	gen rbmi_F`wave' 	= F`wave'.rbmi		
/**/ }
* Now creating the best BMI variable we can do
* The flag
gen rbmi_flag = .
replace rbmi_flag = 11 if missing(rbmi) 	& ~missing(rbmi_L1)							& missing(rbmi_flag)
replace rbmi_flag = 12 if missing(rbmi) 	& ~missing(rbmi_L2)							& missing(rbmi_flag)
replace rbmi_flag = 13 if missing(rbmi) 	& ~missing(rbmi_L3)							& missing(rbmi_flag)
replace rbmi_flag = 14 if missing(rbmi) 	& ~missing(rbmi_L4)							& missing(rbmi_flag)
replace rbmi_flag = 15 if missing(rbmi) 	& ~missing(rbmi_L5)							& missing(rbmi_flag)
replace rbmi_flag = 16 if missing(rbmi) 	& ~missing(rbmi_L6)							& missing(rbmi_flag)
replace rbmi_flag = 21 if missing(rbmi) 	& ~missing(rbmi_F1)							& missing(rbmi_flag)
replace rbmi_flag = 22 if missing(rbmi) 	& ~missing(rbmi_F2)							& missing(rbmi_flag)
replace rbmi_flag = 23 if missing(rbmi)		& ~missing(rbmi_F3)							& missing(rbmi_flag)
replace rbmi_flag = 24 if missing(rbmi) 	& ~missing(rbmi_F4)							& missing(rbmi_flag)
replace rbmi_flag = 25 if missing(rbmi) 	& ~missing(rbmi_F5)							& missing(rbmi_flag)
replace rbmi_flag = 26 if missing(rbmi) 	& ~missing(rbmi_F6)							& missing(rbmi_flag)
replace rbmi_flag = 99 if missing(rbmi) 	 											& missing(rbmi_flag)
replace rbmi_flag = 0  if ~missing(rbmi) 	
* The variable
replace rbmi = rbmi_L1 if missing(rbmi) 	& ~missing(rbmi_L1)
replace rbmi = rbmi_L2 if missing(rbmi) 	& ~missing(rbmi_L2)
replace rbmi = rbmi_L3 if missing(rbmi) 	& ~missing(rbmi_L3)
replace rbmi = rbmi_L4 if missing(rbmi) 	& ~missing(rbmi_L4)
replace rbmi = rbmi_L5 if missing(rbmi) 	& ~missing(rbmi_L5)
replace rbmi = rbmi_L6 if missing(rbmi) 	& ~missing(rbmi_L6)
replace rbmi = rbmi_F1 if missing(rbmi) 	& ~missing(rbmi_F1)
replace rbmi = rbmi_F2 if missing(rbmi) 	& ~missing(rbmi_F2)
replace rbmi = rbmi_F3 if missing(rbmi)		& ~missing(rbmi_F3)
replace rbmi = rbmi_F4 if missing(rbmi) 	& ~missing(rbmi_F4)
replace rbmi = rbmi_F5 if missing(rbmi) 	& ~missing(rbmi_F5)
replace rbmi = rbmi_F6 if missing(rbmi) 	& ~missing(rbmi_F6)
* Labelling the variable
label define rbmi_flag 0 "0_observed value" 99 "99_no estimate possible", modify
forvalues i = 1/6 {
	label define rbmi_flag 1`i' "1`i'_`i' wave(s) earlier", modify
	label define rbmi_flag 2`i' "1`i'_`i' wave(s) later", modify
/**/ }
label values rbmi_flag rbmi_flag
label var rbmi_flag "rbmi carryforward flag: rbmi copied from a different wave"
order rbmi_flag, after(rbmi)
*tab rbmi_flag survey
*browse mergeid* survey_waveN rbmi*	
drop rbmi_L? rbmi_F?


// RBMI categorical
recode rbmi (0/18.4999=1 "1_underweight")(18.5/24.9999=2 "2_normal weight")(25/29.9999=3 "3_overweight")(30/max=4 "4_obesity"), gen(rbmicat) // I'm deriving this myself, but it's identical to the RAND version
label var rbmicat "BMI category 1-4"


*_____________________________________________________________________________________________________________________________________________________
*
**# EMPLOYMENT & EDUCATION
*_____________________________________________________________________________________________________________________________________________________

// Employment - main version
replace rworknew = rwork if survey=="ELSA" | survey=="HRS"	// My bespoke version for SHARE to match HRS-ELSA (SHARE's worknew is already defined)


// Employment - self-reported labour force status, for Bohiem-Leoni collaboration
label var rlbrf		"Labour force status (HRS)"
label var rlbrf_e	"Labour force status (ELSA)"
label var rlbrf_s	"Labour force status (SHARE)"
recode rlbrf 	(1 2 4=1 "1_status is working")(3 5/7=0 "0_other status"), gen(rwork_lbrf)
recode rlbrf_e 	(1 2 4=1 "1_status is working")(3 5/7=0 "0_other status"), gen(rwork_lbrf_e)
recode rlbrf_s 	(1    =1 "1_status is working")(3 5/8=0 "0_other status"), gen(rwork_lbrf_s)
	replace rwork_lbrf = rwork_lbrf_e if survey=="ELSA"
	replace rwork_lbrf = rwork_lbrf_s if survey=="SHARE"
label var rwork_lbrf "Work status: working as SR labour force status"
drop rwork_lbrf_? rlstat? 			// At least for the time being


// Employment - positive earnings
gen rwork_earningsT = (riearn>0 & ~missing(riearn)) | (ritearn>0 & ~missing(ritearn)) 
	replace rwork_earningsT = 2 if rwork_earningsT~=1 & ( (risemp>0 & ~missing(risemp)) | (ritsemp>0 & ~missing(ritsemp)) )
	replace rwork_earningsT = . if missing(riearn) & missing(ritearn) & missing(risemp) & missing(ritsemp)
	label define rwork_earningsT 0 "0_no earnings income" 1 "1_emp income" 2 "2_semp income"
	label values rwork_earningsT rwork_earningsT 
	label var rwork_earningsT "Working: from past year earnings (sep emp & self-emp)"
	recode rwork_earningsT (0=0)(1 2=1), gen(rwork_earnings)
	label var rwork_earnings "Working: from past year earnings (binary)"
	/* Originally got rid of imputed earnings - but actually there's no reason to do this:
	*replace rwork_earnings = .o if (survey=="HRS"  & inlist(rifearn, 2, 9)) 		| (survey=="SHARE" & inlist(rifearn, 3, 5, 7))
	*replace rwork_earnings = .o if 												| (survey=="SHARE" & inlist(ritfearn, 3, 5, 7))		// ELSA removed: (survey=="ELSA" & inlist(ritifearn, 3, 5, 7)) 	
	*/
	replace rwork_earnings = . if survey=="ELSA" 				// ELSA excluded as doesn't have individual-level self-emplyoment earnings
	replace rwork_earnings = . if survey=="SHARE" & wave==2 	// For some reason working people seem to be estimated to have zero earnings much more in this wave - some unknown error here so taken out 
	label var rwork_earnings "Work status: any earnings (inc imputed values)"
drop r*earn
	
	
// Employment - hours-related (positive hours, split by hours of work
* Any employment - positive hours
replace rjhours 	= round(rjhours,1) if survey=="SHARE"		// to make consistent
replace rjhour2 	= round(rjhour2,1) if survey=="SHARE"		// to make consistent
replace rjhour2_e 	= round(rjhour2_e,1) if survey=="ELSA"		// to make consistent
gen rjhourstot = cond(~missing(rjhours),rjhours,0) + cond(~missing(rjhour2),rjhour2,0) if (survey=="HRS" | survey=="SHARE")
	replace rjhourstot = rjhours if missing(rjhours) & missing(rjhour2) & (survey=="HRS" | survey=="SHARE")
	replace rjhourstot = cond(~missing(rjhours), rjhours,0) + cond(~missing(rjhour2_e),rjhour2_e,0) if survey=="ELSA"
	replace rjhourstot = rjhours if missing(rjhours) & missing(rjhour2_e) & (survey=="ELSA")
	replace rjhourstot = .w if rwork==0
	label var rjhourstot "Hours of work per wk, 1st + 2nd jobs"
* Employment - split by hours of work	
recode rjhourstot (-99=0 "0_not working")(0/7=1 "1_mini-job (<8hrs/wk)")(8/15=2 "2_small pt (8-15hrs/wk)")(16/29=3 "3_large pt (16-29hrs/wk)")(30/max=4 "4_full-time (30hrs+/wk)"), gen(rworktype)
	replace rworktype = 0 if rworknew==0			// Otherwise quite a lot of missingness in hours
	label var rworktype "Type of job: mini-job to full-time (4cats)"
drop *hour*	


// Education
recode raedyrs (0/11=1 "1_low")(12/15=2 "2_medium")(16/17=3 "3_high") if survey=="HRS", gen(risced3)
	replace risced3 = 1 if survey=="SHARE" & inlist(raiscednew, 0, 1, 2)
	replace risced3 = 2 if survey=="SHARE" & inlist(raiscednew, 3, 4)
	replace risced3 = 3 if survey=="SHARE" & inlist(raiscednew, 5, 6)
	replace risced3 = 1 if survey=="ELSA"  & inlist(raedisced, .n)
	replace risced3 = 2 if survey=="ELSA"  & inlist(raedisced, 3, 4)
	replace risced3 = 3 if survey=="ELSA"  & inlist(raedisced, 5)
	label var risced3 "Education (ISCED, 3-banded)"
	* Copying over missing value codes
	replace risced3 = raedisced  if survey=="ELSA"   & missing(raedisced)  & missing(risced3)
	replace risced3 = raiscednew if survey=="SHARE"  & missing(raiscednew) & missing(risced3)
drop r*edisced* raiscednew
// raedyrs* was dropped here, but is now used under the Boheim-Leoni sociodemographics where they wanted a years of education variable


*_____________________________________________________________________________________________________________________________________________________
*
**# DISABILITY BENEFITS
*_____________________________________________________________________________________________________________________________________________________

// Disability benefits, from reported claims
label var rbennow_incap 	"Current incapacity ben receipt (main version)"
label var rbennow_incap3  	"HRS only: Current SSI/SSDI/vet ben"
label var rbennow_ssdi    	"HRS only: Current SSDI (exc SSI)"
label var rbennow_vet		"HRS only: Current veterans bens (inc mths status)"
label var rbennow_costs   	"ELSA only: Current extra costs ben (DLA/AA/(w7)PIP)"
label var rbennow_incap2  	"ELSA only: Current incapacity-RELATED ben (IIDB/WDP+IB/SDA/(w5)ESA)"
label var rbennow_other   	"ELSA only: Current other health-related ben (mainly ICA/CA)"
label var rbennow_sick     	"ELSA & SHARE: Current sickness ben"
	replace rbennow_sick = rbennow_ssp if survey=="ELSA"
label var rbennow_incap_sick "ELSA & SHARE: Current incap OR sickness ben"
	replace rbennow_incap_sick = 1 if (rbennow_incap==1 | rbennow_sick==1 ) & survey=="ELSA"
	replace rbennow_incap_sick = 0 if (rbennow_incap==0 & rbennow_sick==0 ) & survey=="ELSA"
	replace rbennow_incap_sick = rbennow_incap if missing(rbennow_incap) & survey=="ELSA"
	replace rbennow_incap_sick = rbennow_sick  if missing(rbennow_sick)  & survey=="ELSA"
* Labelling & tidying
label define bennow 0 "0_not claiming" 1 "1_current claim"
label values rbennow_incap* rbennow_costs rbennow_other rbennow_ssp bennow
drop rbennow_ssp


// Disability benefits, from reported income
* Cleaning
drop rbeninc1_dis		// This is the Borsch-Supan version for w1 only
label var rbeninc2_dis 	"Dis ben income for ALL of last year (my preferred version)"
label var rbeninc3_dis 	"Dis ben income last year (RAND version inc imputations)"
label var rbenstat2_dis "Mths of dis ben income last year (my preferred version)"
notes drop _all
notes rbeninc3_dis: not consistent between waves for AT DE ES (w1v6 includes sick ben, but w2-5 does not)
notes rbeninc3_dis: includes sickness benefit in only some countries (SE, ES, DK, CZ)


// Other (non-disability) benefits, from reported income
gen rbeninc3_other = 0 if ~missing(rbeninc3_dis) & survey~="ELSA"
	replace rbeninc3_other = 1 if (riunwc>0 & ~missing(riunwc)) | (rigxfr>0 & ~missing(rigxfr))   | (ritgxfr>0 & ~missing(ritgxfr))
	label var rbeninc3_other  "Other benefit (inc unemp) income last year (RAND version inc imputations)"
gen rbeninc3_pens = 0  if ~missing(rbeninc3_dis) & survey~="ELSA"
	replace rbeninc3_pens  = 1 if (ripena>0 & ~missing(ripena)) | (ritpena>0 & ~missing(ritpena)) | (ritsret>0 & ~missing(ritsret)) | (risret>0 & ~missing(risret))
	label var rbeninc3_pens  "Pension (state or employer) income last year (RAND version inc imputations)"
clonevar rbeninc3_dis2 = rbeninc3_dis  
	recode rbeninc3_dis2 (2=1)
	egen rbeninc3_all = group(rbeninc3_dis2 rbeninc3_other rbeninc3_pens), label truncate(1)
	label var rbeninc3_all "Pattern of R disben/othben/pens income last year (RAND version inc imputations)"
	*numlabel rbeninc3_all, mask(#_) add
	drop rbeninc3_dis2 
/* CHECKS
	tab country rbeninc3_other if ragey<60 | ragey_e<60, row nof
	tab country rbeninc3_pens if ragey<60 | ragey_e<60, row nof
	tab country rbeninc3_dis if ragey<60 | ragey_e<60, row nof
	tab country rbeninc3_all if ragey<60 | ragey_e<60, row nof
	bysort rwork_earnings: 	tab country rbeninc3_all if ragey<60 | ragey_e<60, row nof
*/
drop riunwc rigxfr ritgxfr ripena ritpena ritsret risret hitot hittot rifsdi rifssdi rifssi rifwcmp risdi risemp rissdi rissdi_e rissi ritfssdi ///
	ritsemp ritssdi riwcmp rjweek* rsymptom8

	
*_____________________________________________________________________________________________________________________________________________________
*
**# BOHEIM-LEONI VARS ADDED NOV 2017, for within-country trends, but not comparable between countries
*_____________________________________________________________________________________________________________________________________________________

order rhosp rdoctim rnrshom rhomcar rhosp1y rdoctim1y rhomcar1y rnrshom1y rhibpe rstroke rdiabe rlunge rarthre rcancre rback rback2 rhearte rhepain rhepawal rhepawba rhepawfe rhepawhi rhepawkn rhepawmo rhepawot rph084_ rph087d1 rph087d2 rph087d3 rph087d4 rph087d5 rph087d6 rph087d7, last

// Medical and social care (not ELSA)
label var rhosp 	"hospital stays last 2yrs (SHARE: hospital stays last 12 months)"
label var rhomcar 	"home care in last 2yrs (SHARE: home care in last 12 months)"
label var rdoctim 	"number of doctors seen in last 2yrs (originally in PWV (2013): r*doctor, have seen doctor in last 12 months; in SHARE: nr of doctor visits in last 12 months)"
label var rnrshom 	"nursing home in last 2yrs (SHARE: nursing home in last 12 months"
foreach var in rhosp rhomcar rdoctim rnrshom {
	replace `var' = `var'1y if survey=="SHARE"
/**/ }
drop *1y


// Chronic conditions
label var rhibpe 	"high blood pressure"
label var rstroke 	"stroke"
label var rdiabe 	"diabetes or high blood sugar"
label var rlunge 	"chronic lung disease"
label var rarthre 	"arthritis"
label var rcancre 	"cancer"
label var rhearte 	"heart problems"
label var rpsyche	"affective or emotional disorders"
label var rasthmae  "asthma"
label var rhchole  	"high blood cholestrol"
label var rparkine  "Parkinson disease"
label var rcatrcte 	"cataracts"
label var rhipfeme  "hip or femoral fracture"
foreach var in rhibpe rstroke rdiabe rlunge rarthre rcancre rhearte rpsyche rasthmae rhchole rparkine rcatrcte rhipfeme 	{
	local thislab: var label `var'
	label var `var' "Ever diagnosed: `thislab'"
/**/																														}


// Back problems
label var rback 	"problems with your back (SHARE & ELSA: bothered by backpain)"
label var rback2 "CAUTION: problems with your back (HRS: inc carry-forward for repeat interviewees for alternate waves)"
* SHARE w5-6
label var rph087d1         "Pain location: back"
label var rph087d2         "Pain location: hips"
label var rph087d3         "Pain location: knees"
label var rph087d4         "Pain location: other joints"
label var rph087d5         "Pain location: mouth/teeth"
label var rph087d6         "Pain location: other parts of the body but not joints"
label var rph087d7         "Pain location: all over"
recode rph084_ (-2=.r)(-1=.d)(1=1)(5=0), gen(rback_SHARE)
replace rback_SHARE = 0 if (rph087d1==0 & rph087d2==0 & rph087d3==0 & rph087d4==0)
replace rback_SHARE = .d if rph087d1==-1 
replace rback_SHARE = .r if rph087d1==-2
replace rback = rback_SHARE if survey=="SHARE" & inlist(wave, 5, 6)
drop rph084_ rph087d? rback_SHARE
* SHARE w1-4
replace rback = rsymptom1 if survey=="SHARE" & inlist(wave, 1, 2, 4)
* ELSA
label var rhepain	"Whether often troubled with pain"
label var rhepawba	"Whether feel pain in back"
label var rhepawhi	"Whether feel pain in hips"
label var rhepawkn	"Whether feel pain in knees"
label var rhepawfe	"Whether feel pain in feet"
label var rhepawmo	"Whether feel pain in mouth or teeth"
label var rhepawot	"Whether feel pain elsewhere"
label var rhepawal	"Whether feel pain all over"
recode rhepain (-9=.r)(-8=.d)(-1=.n)(1=1)(2=0), gen(rback_ELSA)
replace rback_ELSA = 0 if (rhepawba==0 & rhepawhi==0 & rhepawkn==0)
replace rback_ELSA = .d if rhepawba==-8
replace rback = rback_ELSA if survey=="ELSA"
drop rhepain-rhepawot rback_ELSA 


*_____________________________________________________________________________________________________________________________________________________
*
**# BOHEIM-LEONI SOCIODEMOGRAPHC VARS ADDED NOV 2017
*_____________________________________________________________________________________________________________________________________________________

* Age in months
replace ragem = ragem_e if survey=="HRS"
label var ragem "Age in months, HRS/SHARE only (at end of interview in HRS)"
drop ragem_e

* Year and month of birth
label var rabyear 	"Year of birth"
label var rabmonth	"Month of birth (HRS/SHARE only)"
mvdecode rindobyr, mv(-7=.o \ -1=.n)
replace rabyear = rindobyr if survey=="ELSA"
drop rindobyr

* Children
replace hchild = rchild if survey=="ELSA"
label var hchild "Number of children (natural plus adopted), for household (HRS/SHARE) / respondent (ELSA)"
drop rchild

* Years of education
version 14
table survey, c(count raedyrs count raedend)
recode raedend (-9=.r)(-8=.d)(-1=.n)(1=-99 "not yet finished")(2=0 "0_none")(3=9)(4=10)(5=11)(6=12)(7=13)(8=14 "14. 14+yrs"), gen(raedyrs_e)
	replace raedyrs_e = .o if raedyrs_e==-99
	label var raedyrs_e "Years of schooling, max 14 (ELSA)"
clonevar raedyrs_s = raedyrs if survey=="SHARE"
	label var raedyrs_s "Years of schooling, max 21 (SHARE)"
	replace raedyrs = . if survey=="SHARE"
label var raedyrs		"Years of schooling, max 17 (HRS)"
drop raedend


* Marital status (added for v3-3 Dec 2017)
label var rmstat "Marital status"


* Tenure & last job
label var rjcten 	"Tenure on current job"
label var rjlasty	"Last job: year"
label var rjlastm	"Last job: month (HRS & ELSA only)"



*_____________________________________________________________________________________________________________________________________________________
*
**# FINAL THINGS
*_____________________________________________________________________________________________________________________________________________________

encode survey, gen(surveyN)
replace ragey = ragey_e if survey=="HRS"
	drop ragey_e
gen rage55 = 55 - ragey
	label var rage55   "Age centred at 55"
gen rsqage55 = rage55^2
	label var rsqage55 "Age squared, centred at 55"
	
order _all, sequential
order survey surveyN mergeid mergeidnum survey_waveN wave year riwy cid country* inw rmn024_ rmn101_ rproxy* rwtresp2 rwtresp rscwtresp wnurwt rmale ragey ragem rage55 rsqage55 rabyear rabmonth rmstat hchild , first
order rwalk100a rsita rchaira rclimsa rclim1a rstoopa rarmsa rpusha rlifta rdimea anymotor countmotor, after(hchild)
order rdressa rwalkra rbatha reata rbeda rtoilta anyADL countADL rmapa rmealsa rshopa rphonea rmedsa rmoneya anyIADL* countIADL, after(countmotor)
order r??rc* rverbf rmh* rmaxgrip* rsymptom* rbmi rbmi_flag rheight rweight rbmicat, after(countIADL)
order rhosp rdoctim rnrshom rhomcar rhibpe rhchole rstroke rhearte rdiabe rlunge rasthmae rcatrcte rcancre rparkine rhipfeme rhipe rarthre rback rback2 rpsyche, after(rbmicat)
order rwork rlbrf* /* rlstat? */ rwork* rjcten rjlasty rjlastm	rben* risced3 raedyrs*, last
compress


// Regimes - done at the end as inefficiently coded (done from Excel sheet in OECD data folder)
gen disregime_halv = 2 if country==11
replace disregime_halv = 2 if country==23
replace disregime_halv = 3 if country==47
replace disregime_halv = 5 if country==28
replace disregime_halv = 2 if country==18
replace disregime_halv = 4 if country==61
replace disregime_halv = 5 if country==35
replace disregime_halv = 2 if country==17
replace disregime_halv = 2 if country==12
replace disregime_halv = 6 if country==19
replace disregime_halv = 5 if country==32
replace disregime_halv = 6 if country==30
replace disregime_halv = . if country==25
replace disregime_halv = 3 if country==16
replace disregime_halv = 1 if country==31
replace disregime_halv = 1 if country==14
replace disregime_halv = 5 if country==29
replace disregime_halv = 3 if country==33
replace disregime_halv = 4 if country==34
replace disregime_halv = 2 if country==15
replace disregime_halv = 1 if country==13
replace disregime_halv = 2 if country==20
replace disregime_halv = . if country==51
gen disregime_boheim = 2 if country==11
replace disregime_boheim = 3 if country==23
replace disregime_boheim = . if country==47
replace disregime_boheim = . if country==28
replace disregime_boheim = 2 if country==18
replace disregime_boheim = 1 if country==61
replace disregime_boheim = . if country==35
replace disregime_boheim = 3 if country==17
replace disregime_boheim = 2 if country==12
replace disregime_boheim = . if country==19
replace disregime_boheim = . if country==32
replace disregime_boheim = 3 if country==30
replace disregime_boheim = . if country==25
replace disregime_boheim = 3 if country==16
replace disregime_boheim = 3 if country==31
replace disregime_boheim = 2 if country==14
replace disregime_boheim = 3 if country==29
replace disregime_boheim = 3 if country==33
replace disregime_boheim = . if country==34
replace disregime_boheim = 3 if country==15
replace disregime_boheim = 2 if country==13
replace disregime_boheim = 2 if country==20
replace disregime_boheim = 1 if country==51
gen disregime_oecd = 5 if country==11
replace disregime_oecd = 5 if country==23
replace disregime_oecd = . if country==47
replace disregime_oecd = 7 if country==28
replace disregime_oecd = 1 if country==18
replace disregime_oecd = 3 if country==61
replace disregime_oecd = . if country==35
replace disregime_oecd = 6 if country==17
replace disregime_oecd = 2 if country==12
replace disregime_oecd = 6 if country==19
replace disregime_oecd = 5 if country==32
replace disregime_oecd = 7 if country==30
replace disregime_oecd = . if country==25
replace disregime_oecd = 7 if country==16
replace disregime_oecd = 6 if country==31
replace disregime_oecd = 1 if country==14
replace disregime_oecd = 6 if country==29
replace disregime_oecd = 7 if country==33
replace disregime_oecd = 7 if country==34
replace disregime_oecd = 7 if country==15
replace disregime_oecd = 2 if country==13
replace disregime_oecd = 1 if country==20
replace disregime_oecd = 4 if country==51
label var disregime_halv 	"Disability policy regime - Halvorsen et al 2017"
label var disregime_boheim 	"Disability policy regime - Boheim & Leoni 2016"
label var disregime_oecd 	"Disability policy regime - OECD 2010+Scharle 2015 "
label define halv 1 "High spenders, weak mtbs" 2 "High spenders, strong mtbs" 3 "Mixed-high cash, low services, strong mtbs" ///
		4 "Mixed-low cash, high services, variable mthbs" 5 "Low spenders, weak mtbs" 6 "Low spenders, strong mtbs"
	label values disregime_halv halv
label define boheim 1 "Low compensation + medium integration"  2 "High integration + high compensation" 3 "Medium compensation + low integration"
	label values disregime_boheim boheim
label define oecd 1 "Soc-Dem A" 2 "Soc-Dem B" 3 "Lib-A" 4 "Lib-B" 5 "Corp-A" 6 "Corp-B" 7 "Corp-C", modify
	label values disregime_oecd oecd
numlabel halv boheim oecd, mask(#_) add
order disregime*, after(country2)

/*
use "${workingdata}\HRS-SHARE-ELSA_BBG_${versno}.dta", replace
*/